/*
 * routines for sending data through a myrinet tunnel to another FMA.
 * These messages are sent using raw, unreliable packets, and so must
 * usually be fragmented.
 *
 * Tunneled messages start with an intro packet that contains a little
 * data.  Each subsequent packet carries the max data possible until
 * the entire packet is send.  Each message has a unique ID which is
 * contained in each fragment packet.
 *
 * Acknowledgements are currently sent for each packet, but the
 * protocol is designed to allow ACK aggregation.
 */
#include <sys/types.h>
#include <netinet/in.h>

#include "libfma.h"
#include "lf_fabric.h"
#include "lf_xbar32.h"
#include "lf_scheduler.h"
#include "lf_myri_packet.h"
#include "libmyri.h"

#include "fma.h"
#include "fma_myri.h"
#include "fma_fms.h"
#include "fma_tunnel.h"
#include "fma_fabric.h"
#include "fma_myri_packet.h"

#define TUN_DEBUG (A.debug > 1)

/*
 * local prototypes
 */
static void fma_tunnel_fill_start_pkt(struct fma_tunnel_tx_desc *ttdp,
                                      int frag_len);
static void fma_tunnel_start_tx_timer(void *vttdp);
static void fma_tunnel_tx_timeout(void *vttdp);
static void fma_tunnel_tx_failed(struct fma_tunnel_tx_desc *ttdp);
static void fma_tunnel_tx_done(struct fma_tunnel_tx_desc *ttdp);
static void fma_tunnel_fill_body_packet(struct fma_tunnel_tx_desc *ttdp,
                                        int frag_len);
static void fma_tunnel_send_ack(struct fma_tunnel_rx_desc *trdp);
static void fma_tunnel_rx_complete(struct fma_tunnel_rx_desc *trdp);
static struct fma_tunnel_tx_desc *fma_tunnel_find_tx_desc(int msg_id);
static struct fma_tunnel_rx_desc *fma_tunnel_find_rx_desc(int msg_id,
                                                lf_mac_addr_t mac_addr);
static void fma_tunnel_rx_timeout(void *vtrdp);
static int fma_tunnel_next_id(void);

/*
 * Initialize tunnel variables
 */
void
fma_init_tunnel_vars()
{
  struct fma_tunnel *tp;

  LF_CALLOC(tp, struct fma_tunnel, 1);

  /* Initialize both lists */
  tp->tx_anchor.next = &tp->tx_anchor;
  tp->tx_anchor.prev = &tp->tx_anchor;
  tp->rx_anchor.next = &tp->rx_anchor;
  tp->rx_anchor.prev = &tp->rx_anchor;

  /* save this in global struct */
  A.tunnel = tp;
  return;

 except:
  fma_perror_exit(1);
}

/*
 * Transmit oriented routines
 */

/*
 * Return a unique ID for a tunneled message.  0 and -1 are not a valid IDs
 */
static int
fma_tunnel_next_id()
{
  /* 0 and -1 are not a valid message IDs */
  if (A.tunnel->tunnel_msg_id == 0 || A.tunnel->tunnel_msg_id == -1) {
    A.tunnel->tunnel_msg_id = 1;
  }
  return A.tunnel->tunnel_msg_id++;
}

/*
 * Send a message to another FMA through a tunnel using raw send fragments.
 * This mechanism turns the unreliable raw message transport into a reliable
 * transport capable of sending very large messages.
 * Right now it is implemented with a window size of 1, meaning send, wait
 * for ack, send, wait for ack...  Thus, it is not super fast, but is really
 * simple.
 *
 * This takes two message segments and concatenates them to facilitate 
 * prepending a header to an outgoing message.  In order to simplify the 
 * life of the caller, we require the msg1 fits inside our first packet, which
 * means it can be treated as a synchronous send.
 */
int
fma_tunnel_send(
  void *msg1,
  int length1,
  void *msg2,
  int length2,
  uint8_t *route,
  int route_len,
  struct fma_nic_info *nip,
  int port,
  void (*complete_rtn)(void *),
  void (*error_rtn)(void *),
  void *context)
{
  struct fma_tunnel_tx_desc *ttdp;
  struct fma_myri_packet *pp;
  struct fma_tunnel *tp;
  int frag_len;
  int rc;

  tp = A.tunnel;

  /* verify that we sent the entire header */
  if (length1 > LF_MAX_TUNNEL_START_LEN) {
    LF_ERROR(("Header segment too long"));
  }

  /* get a new tunnel descriptor */
  LF_CALLOC(ttdp, struct fma_tunnel_tx_desc, 1);
  pp = (struct fma_myri_packet *) malloc(LF_MAX_MYRI_RAW_PKT_LEN);
  if (pp == NULL) {
    LF_ERROR(("Error allocating tunnel packet buffer"));
  }

  /* a huge hack to support 0 length messages XXX XXX XXX */
  if (length1 == 0) {
    msg1 = &A;
    length1 = 4;
    length2 = 0;
  }

  /* fill in the tunnel descriptor */
  ttdp->msg_id = fma_tunnel_next_id();
  ttdp->msg1 = msg1;
  ttdp->length1 = length1;
  ttdp->msg2 = msg2;
  ttdp->length2 = length2;
  memcpy(ttdp->route, route, route_len);
  ttdp->route_len = route_len;
  ttdp->nic_handle = nip->nic_handle;
  ttdp->port = port;
  LF_MAC_COPY(ttdp->my_mac_addr, nip->myri_info.mac_addr);
  ttdp->complete_rtn = complete_rtn;
  ttdp->error_rtn = error_rtn;
  ttdp->pkt_ptr = pp;
  ttdp->context = context;


  /* find out how much to send */
  frag_len = ttdp->length1 + ttdp->length2;
  if (frag_len > LF_MAX_TUNNEL_START_LEN) {
    frag_len = LF_MAX_TUNNEL_START_LEN;
  }

  /* fill in our first packet */
  fma_tunnel_fill_start_pkt(ttdp, frag_len);

  /* This is next offset to send */
  ttdp->offset_to_send = frag_len;

  /* reset retry count */
  ttdp->retries_left = FMA_TUNNEL_NUM_RETRIES;
  ttdp->last_pkt_len = LF_TUNNEL_START_PKT_LEN(frag_len);

  /* send this packet */

  rc = fma_myri_raw_send(ttdp->nic_handle, port, route, route_len,
                     pp, ttdp->last_pkt_len,
		     fma_tunnel_start_tx_timer, ttdp);
  if (rc != 0) {
    LF_ERROR(("Error sending tunnel packet"));
  }
  ttdp->sends_pending = 1;

  /* link in the descriptor */
  ttdp->next = tp->tx_anchor.next;
  ttdp->prev = &tp->tx_anchor;
  ttdp->next->prev = ttdp;
  ttdp->prev->next = ttdp;

  return ttdp->msg_id;

 except:
  fma_perror_exit(1);
  return 0;
}

#if 0
static void
dump_tx_list()
{
  struct fma_tunnel_tx_desc *ttdp;
  int i;

  i=0;
  fma_log("TX list:");
  ttdp = A.tunnel->tx_anchor.next;
  while (ttdp != &A.tunnel->tx_anchor) {
    fma_log("\tID %d, offset=%d", ttdp->msg_id, ttdp->offset_to_send);
    ttdp = ttdp->next;
    if (++i > 100) {
      fma_log("BAILING OUT!");
      exit(1);
    }
  }
}
#endif

/*
 * The packet has been sent, start the ACK timeout timer
 */
static void
fma_tunnel_start_tx_timer(
  void *vttdp)
{
  struct fma_tunnel_tx_desc *ttdp;

  ttdp = vttdp;

  /* clear timer if running */
  if (ttdp->ack_timer != NULL) {
    lf_remove_event(ttdp->ack_timer);
    ttdp->ack_timer = NULL;
  }

  /*
   * If descriptor is stale, just free it, else Start the timer once
   * our last send goes out
   */
  if (ttdp->stale) {
    if (--ttdp->sends_pending <= 0) {
      LF_FREE(ttdp);		/* It's already unlinked */
    }

  /* check sends and start ACK timer if nothing left to do */
  } else {
    if (--ttdp->sends_pending <= 0) {
      ttdp->ack_timer = lf_schedule_event(fma_tunnel_tx_timeout, ttdp,
					  FMA_TUNNEL_ACK_TIMEOUT);
      if (ttdp->ack_timer == NULL) {
	LF_ERROR(("Error allocating tunnel ACK timeout"));
      }
    }
  }

  return;

 except:
  fma_perror_exit(1);
}

/*
 * Fill in a tunnel start packet
 */
static void
fma_tunnel_fill_start_pkt(
  struct fma_tunnel_tx_desc *ttdp,
  int frag_len)
{
  struct fma_tunnel_start *tsp;
  struct fma_myri_packet *pp;

  pp = ttdp->pkt_ptr;
  tsp = &pp->u.tunnel_start;

  /* basic type and subtype */
  pp->h.type_16 = htons(FMA_PACKET_TYPE);
  pp->h.subtype_16 = htons(FMA_SUBTYPE_FMA_TUNNEL_START);

  /* now the tunnel_start specific part */
  tsp->msg_length_32 = htonl(ttdp->length1 + ttdp->length2);
  tsp->msg_id_32 = htonl(ttdp->msg_id);
  LF_MAC_COPY(tsp->origin_mac_addr, ttdp->my_mac_addr);
  lf_reverse_route(tsp->reply_route, ttdp->route, ttdp->route_len);
  tsp->reply_route_len_8 = ttdp->route_len;

  tsp->frag_len_32 = htonl(frag_len);

  /* finally, fill in the data */
  if (ttdp->length1 > 0) {

    if (frag_len > ttdp->length1) {
      memcpy(tsp->data, ttdp->msg1, ttdp->length1);
      memcpy(tsp->data + ttdp->length1, ttdp->msg2, frag_len - ttdp->length1);
    } else {
      memcpy(tsp->data, ttdp->msg1, frag_len);
    }
  } else {
    memcpy(tsp->data, ttdp->msg2, frag_len);
  }
} 

/*
 * We got no ACK within the specified response time.  Send another
 * packet if any retries left, else the send has failed.
 */
static void
fma_tunnel_tx_timeout(
  void *vttdp)
{
  struct fma_tunnel_tx_desc *ttdp;
  int rc;

  ttdp = vttdp;

  /* clear the timer */
  ttdp->ack_timer = NULL;

  /* If retries left, send the packet again */
  if (ttdp->retries_left-- > 0) {
    rc = fma_myri_raw_send(ttdp->nic_handle, ttdp->port,
	             ttdp->route, ttdp->route_len,
                     ttdp->pkt_ptr, ttdp->last_pkt_len,
		     fma_tunnel_start_tx_timer, ttdp);
    if (rc != 0) {
      LF_ERROR(("Tunnel packet re-transmit failed"));
    }
    ++ttdp->sends_pending;

  /* otherwise, the transmit failed */
  } else {
    fma_tunnel_tx_failed(ttdp);
  }
  return;

 except:
  fma_perror_exit(1);
}

/*
 * The sending of a tunneled packet timed out too many times and has failed.
 * Call the failure routine and then clean up.
 */
static void
fma_tunnel_tx_failed(
  struct fma_tunnel_tx_desc *ttdp)
{
  /* Call the error routine for this transmission */
  if (ttdp->error_rtn != NULL) {
    ttdp->error_rtn(ttdp->context);
  }

  /* unlink the descriptor */
  ttdp->next->prev = ttdp->prev;
  ttdp->prev->next = ttdp->next;

  /* and free the memory */
  LF_FREE(ttdp->pkt_ptr);
  LF_FREE(ttdp);
}

/*
 * Got an ACK from the receiver
 */
void
fma_tunnel_got_ack(
  struct fma_myri_packet *pkt)
{
  struct fma_tunnel_tx_desc *ttdp;
  int frag_len;
  int msg_id;
  int rc;

  /* get the message ID */
  msg_id = ntohl(pkt->u.tunnel_ack.msg_id_32);

  /* use the ID to find a descriptor, drop if no match */
  ttdp = fma_tunnel_find_tx_desc(msg_id);
  if (ttdp == NULL) {
    return;
  }

  /* If the offset is not right, just drop this ACK packet */
  if (ntohl(pkt->u.tunnel_ack.offset_32) != ttdp->offset_to_send) {
    if (TUN_DEBUG) fma_log("ignoring ACK for msg_id %d, offset %d != %d",
	msg_id, ntohl(pkt->u.tunnel_ack.offset_32), ttdp->offset_to_send);
    return;
  }

  /* We now know this is a valid ACK - stop the timeout timer */
  lf_remove_event(ttdp->ack_timer);
  ttdp->ack_timer = NULL;

  /* find out how much to send */
  frag_len = (ttdp->length1 + ttdp->length2) - ttdp->offset_to_send;
  if (frag_len > LF_MAX_TUNNEL_FRAG_LEN) {
    frag_len = LF_MAX_TUNNEL_FRAG_LEN;
  }

  /* If nothing left, packet is done! */
  if (frag_len <= 0) {
    fma_tunnel_tx_done(ttdp);

  /* otherwise, send the next packet */
  } else {

    /* fill the next packet (this may modify frag_len) */
    fma_tunnel_fill_body_packet(ttdp, frag_len);

    /* adjust offsets */
    ttdp->offset_to_send += frag_len;

    /* reset retry count */
    ttdp->retries_left = FMA_TUNNEL_NUM_RETRIES;
    ttdp->last_pkt_len = LF_TUNNEL_BODY_PKT_LEN(frag_len);

    /* and send it */
    rc = fma_myri_raw_send(ttdp->nic_handle, ttdp->port,
	              ttdp->route, ttdp->route_len,
                      ttdp->pkt_ptr, ttdp->last_pkt_len,
		      fma_tunnel_start_tx_timer, ttdp);
    if (rc != 0) {
      LF_ERROR(("Tunnel packet body transmit failed"));
    }
    ++ttdp->sends_pending;
  }
  return;

 except:
  fma_perror_exit(1);
}

/*
 * Find an active receive descriptor for a msg ID, mac addr combo
 */
static struct fma_tunnel_tx_desc *
fma_tunnel_find_tx_desc(
  int msg_id)
{
  struct fma_tunnel_tx_desc *ttdp;

  ttdp = A.tunnel->tx_anchor.next;
  while (ttdp != &A.tunnel->tx_anchor) {
    if (ttdp->msg_id == msg_id) {
      return ttdp;
    }
    ttdp = ttdp->next;
  }
  return NULL;
}

/*
 * Cancel an in-progress tunnel send.
 * The receive side may wonder what happened, but will eventually time
 * out because we will no longer retransmit or respond to ACKs for this
 * particular tunnel message.
 */
void
fma_tunnel_cancel_send(
  int msg_id)
{
  struct fma_tunnel_tx_desc *ttdp;

  /* find descriptor.  nothing to do if none */
  ttdp = fma_tunnel_find_tx_desc(msg_id);
  if (ttdp == NULL) return;

  /* Cancel any pending timers */
  if (ttdp->ack_timer != NULL) {
    lf_remove_event(ttdp->ack_timer);
    ttdp->ack_timer = NULL;
  }

  /* unlink the descriptor */
  ttdp->next->prev = ttdp->prev;
  ttdp->prev->next = ttdp->next;

  /* and free the memory */
  LF_FREE(ttdp->pkt_ptr);

  /* If sends still pending, mark this descriptor stale, but don't free it */
  if (ttdp->sends_pending > 0) {
    ttdp->stale = TRUE;

  /* otherise, safe to free the descriptor */
  } else {
    LF_FREE(ttdp);
  }
}

/*
 * Fill in a tunnel body packet
 */
static void
fma_tunnel_fill_body_packet(
  struct fma_tunnel_tx_desc *ttdp,
  int frag_len)
{
  struct fma_myri_packet *pp;
  struct fma_tunnel_body *tbp;
  int remain1;

  pp = ttdp->pkt_ptr;
  tbp = &pp->u.tunnel_body;

  /* basic type and subtype */
  pp->h.type_16 = htons(FMA_PACKET_TYPE);
  pp->h.subtype_16 = htons(FMA_SUBTYPE_FMA_TUNNEL_BODY);

  /* now the tunnel_start specific part */
  tbp->msg_id_32 = htonl(ttdp->msg_id);
  tbp->offset_32 = htonl(ttdp->offset_to_send);
  tbp->frag_len_32 = htonl(frag_len);
  LF_MAC_COPY(tbp->origin_mac_addr, ttdp->my_mac_addr);

  /* finally, fill in the data */
  remain1 = ttdp->length1 - ttdp->offset_to_send;
  if (remain1 > 0) {

    if (frag_len > remain1) {
      memcpy(tbp->data, ttdp->msg1 + ttdp->offset_to_send, remain1);
      memcpy(tbp->data + remain1, ttdp->msg2, frag_len - remain1);
    } else {
      memcpy(tbp->data, ttdp->msg1 + ttdp->offset_to_send, frag_len);
    }
  } else {
    memcpy(tbp->data, ttdp->msg2 + ttdp->offset_to_send - ttdp->length1,
		frag_len);
  }
}

/*
 * Transmission is complete.  Call the completion routine and clean up
 */
static void
fma_tunnel_tx_done(
  struct fma_tunnel_tx_desc *ttdp)
{


  /* Call the error routine for this transmission */
  if (ttdp->complete_rtn != NULL) {
    ttdp->complete_rtn(ttdp->context);
  }

  /* unlink the descriptor */
  ttdp->next->prev = ttdp->prev;
  ttdp->prev->next = ttdp->next;

  /* we can free the packet pointer now */
  LF_FREE(ttdp->pkt_ptr);

  /* If sends still pending, mark this descriptor stale, but don't free it */
  if (ttdp->sends_pending > 0) {
    ttdp->stale = TRUE;

  /* otherise, safe to free the descriptor */
  } else {
    LF_FREE(ttdp);
  }
}

/*
 * ===============================================
 * Receive oriented routines
 * ===============================================
 */

/*
 * Got a tunnel start message.  Allocate a descriptor for it and space to
 * hold the message.
 */
void
fma_tunnel_got_start(
  struct fma_nic_info *nip,
  int port,
  struct fma_myri_packet *pkt)
{
  struct fma_tunnel_rx_desc *trdp;
  struct fma_tunnel_start *tsp;
  int msg_id;

  tsp = &pkt->u.tunnel_start; /* pointer to tunnel_start part of msg */

  /* see if msg already in progress */
  msg_id = ntohl(tsp->msg_id_32);
  trdp = fma_tunnel_find_rx_desc(msg_id, tsp->origin_mac_addr);

  /*
   * If no message like this already, set it up.  If message already found,
   * likely our ACK got lost
   */
  if (trdp == NULL) {
    int frag_len;

    /* allocate descriptor */
    LF_CALLOC(trdp, struct fma_tunnel_rx_desc, 1);

    /* Fill in the descriptor */
    trdp->length = ntohl(tsp->msg_length_32);
    trdp->msg_id = msg_id;
    LF_MAC_COPY(trdp->sender_mac_addr, tsp->origin_mac_addr);
    trdp->nip = nip;
    trdp->port = port;
    trdp->reply_route_len = tsp->reply_route_len_8;
    memcpy(trdp->reply_route, tsp->reply_route, trdp->reply_route_len);
    trdp->next_offset = 0;

    if (TUN_DEBUG) {
      fma_log("new Tunnel_start from %s, id=%d, len=%d",
	  fma_mac_to_hostname(trdp->sender_mac_addr),
	  trdp->msg_id, trdp->length);
    }

    /* allocate space for the incoming message */
    trdp->msg = malloc(trdp->length);
    if (trdp->msg == NULL) {
      LF_ERROR(("Error allocating space for %d byte msg"));
    }

    /* link this RX descriptor into the active list */
    trdp->next = A.tunnel->rx_anchor.next;
    trdp->prev = &A.tunnel->rx_anchor;
    trdp->next->prev = trdp;
    trdp->prev->next = trdp;
    
    /* copy in the data */
    frag_len = ntohl(tsp->frag_len_32);
    memcpy(trdp->msg, tsp->data, frag_len);

    /* If message is complete, process it */
    trdp->next_offset = frag_len;

    /*
     * send the ACK for this fragment
     */
    fma_tunnel_send_ack(trdp);

    /* If message is complete, process it */
    if (trdp->next_offset >= trdp->length) {
      fma_tunnel_rx_complete(trdp);
    }

  } else {

    /* Cancel inter-packet timer since we're about to restart it */
    if (trdp->msg_timer != NULL) {
      lf_remove_event(trdp->msg_timer);
      trdp->msg_timer = NULL;
    } else {
      LF_ERROR(("Tunnel descriptor found but timer inactive?"));
    }

    if (TUN_DEBUG) {
      fma_log("DUP Tunnel_start form %s, id=%d, len=%d",
	  fma_mac_to_hostname(trdp->sender_mac_addr),
	  trdp->msg_id, trdp->length);
    }

    /* re-send the ACK for this fragment */
    fma_tunnel_send_ack(trdp);
  }

  /* start the inter-packet message timer */
  trdp->msg_timer = lf_schedule_event(fma_tunnel_rx_timeout, trdp,
		     FMA_TUNNEL_RX_TIMEOUT);
  if (trdp->msg_timer == NULL) LF_ERROR(("Error setting tunnel RX timer"));

  return;

 except:
  fma_perror_exit(1);
}

/*
 * Got a tunnel body packet
 */
void
fma_tunnel_got_body(
  struct fma_myri_packet *pkt)
{
  struct fma_tunnel_rx_desc *trdp;
  struct fma_tunnel_body *tbp;
  int offset;
  int frag_len;
  int msg_id;

  tbp = &pkt->u.tunnel_body;	/* pointer to tunnel body part of msg */
  
  /* find this RX descriptor */
  msg_id = ntohl(tbp->msg_id_32);
  trdp = fma_tunnel_find_rx_desc(msg_id, tbp->origin_mac_addr);

  /* If no such message known, just ignore this */
  if (trdp == NULL) {
    if (TUN_DEBUG) fma_log("Stale tunnel body, ID=%d", msg_id);
    return;
  }

  /* get offset and length.  If not expected offset, ignore the message */
  offset = ntohl(tbp->offset_32);
  frag_len = ntohl(tbp->frag_len_32);

  if (TUN_DEBUG) {
    fma_log("Tunnel_body from %s, id=%d, off=%d, trdp=%p",
      	fma_mac_to_hostname(trdp->sender_mac_addr), msg_id, offset, trdp);
  }

  /* Got a valid message, clear the inter-packet timer */
  lf_remove_event(trdp->msg_timer);
  trdp->msg_timer = NULL;

  /* If this is not the right offset, re-send the last ACK
   * and re-set inter-packet timer
   */
  if (offset != trdp->next_offset) {

    fma_tunnel_send_ack(trdp);		/* send last ACK again */

    /* start the inter-packet message timer */
    trdp->msg_timer = lf_schedule_event(fma_tunnel_rx_timeout, trdp,
		       FMA_TUNNEL_RX_TIMEOUT);
    if (trdp->msg_timer == NULL) LF_ERROR(("Error setting tunnel RX timer"));
    return;
  }
  
  /* copy in the data */
  memcpy(trdp->msg + offset, tbp->data, frag_len);

  /* If message is complete, process it */
  trdp->next_offset += frag_len;
    
  /* ACK the message */
  fma_tunnel_send_ack(trdp);

  /* If message is complete, process it */
  if (trdp->next_offset >= trdp->length) {

    /* process the message and free data */
    fma_tunnel_rx_complete(trdp);
  }

  /*
   * Whether we are done or waiting for the next packet, start
   * the timer.  This is needed so we can deal with dropped last ACKs.
   * (the sender will retransmit last packet, so we need to retransmit
   * last ACK)
   */
  trdp->msg_timer = lf_schedule_event(fma_tunnel_rx_timeout, trdp,
		     FMA_TUNNEL_RX_TIMEOUT);
  if (trdp->msg_timer == NULL) LF_ERROR(("Error setting tunnel RX timer"));
  return;
 
 except:
  fma_perror_exit(1);
}

/*
 * Inter-packet timeout has expired.  Just drop the message.  The message *may*
 * already have completed and been processed, but the steps are the same.
 */
static void
fma_tunnel_rx_timeout(
  void *vtrdp)
{
  struct fma_tunnel_rx_desc *trdp;

  trdp = vtrdp;

  /* clear the timer */
  trdp->msg_timer = NULL;

  if (trdp->next_offset < trdp->length) {
    fma_log("Incoming msg ID %d from %s timed out, next_offset=%d",
      trdp->msg_id, fma_mac_to_hostname(trdp->sender_mac_addr),
      trdp->next_offset);
  } else if (TUN_DEBUG) {
    fma_log("Incoming msg ID %d from %s COMPLETE",
	trdp->msg_id, fma_mac_to_hostname(trdp->sender_mac_addr));
  }

  /* unlink this from the list */
  trdp->next->prev = trdp->prev;
  trdp->prev->next = trdp->next;

  /* free everything */
  LF_FREE(trdp->msg);		/* might already be free */
  LF_FREE(trdp);
}

/*
 * Find an active receive descriptor for a msg ID, mac addr combo
 */
static struct fma_tunnel_rx_desc *
fma_tunnel_find_rx_desc(
  int msg_id,
  lf_mac_addr_t mac_addr)
{
  struct fma_tunnel_rx_desc *trdp;

  trdp = A.tunnel->rx_anchor.next;
  while (trdp != &A.tunnel->rx_anchor) {
    if (trdp->msg_id == msg_id
	&& LF_MAC_CMP(trdp->sender_mac_addr, mac_addr) == 0) {
      return trdp;
    }
    trdp = trdp->next;
  }
  return NULL;
}

/*
 * Incoming message is complete.  Process the message.
 * We don't free or unlink the trdp here because it needs to hang around for
 * a little while in case our last ACK back to sender got lost.  Otherwise,
 * he might not know that we got the message and make poor decisions based
 * on that...
 * We will free the data, though, since there is no reason to leave it
 * hanging around.
 */
static void
fma_tunnel_rx_complete(
  struct fma_tunnel_rx_desc *trdp)
{
  if (TUN_DEBUG) {
    fma_log("Tunnel RX completion being called for ID %d", trdp->msg_id);
  }

  /* process the message */
  fma_myri_handle_recv(trdp->nip, trdp->port, trdp->msg, trdp->length,
      trdp->sender_mac_addr);

  /* Free the message */
  LF_FREE(trdp->msg);
}

/*
 * Send an ACK for the latest packet we received
 */
static void
fma_tunnel_send_ack(
  struct fma_tunnel_rx_desc *trdp)
{
  struct fma_myri_packet *pp;
  struct fma_tunnel_ack *tap;
  int rc;

  pp = &trdp->pkt;	/* pointer to packet */
  tap = &pp->u.tunnel_ack;

  /* Fill in the packet */
  pp->h.type_16 = htons(FMA_PACKET_TYPE);
  pp->h.subtype_16 = htons(FMA_SUBTYPE_FMA_TUNNEL_ACK);
  tap->msg_id_32 = htonl(trdp->msg_id);
  tap->offset_32 = htonl(trdp->next_offset);

  /* Send the ACK packet */
  rc = fma_myri_raw_send(trdp->nip->nic_handle, trdp->port,
                     trdp->reply_route, trdp->reply_route_len,
                     pp, sizeof(*pp),
		     NULL, NULL);
  if (rc != 0) {
    LF_ERROR(("Error sending tunnel packet"));
  }
  return;

 except:
  fma_perror_exit(1);
}
